import glob
import os
import shutil
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import time
import random
import cv2
import h5py
import pandas as pd
from keras.models import Model
from keras.applications.resnet50 import ResNet50
from keras.layers import Dense, Dropout, Activation, Flatten, Input, GlobalAveragePooling2D
from keras.utils import np_utils
from keras.preprocessing import image
from keras.optimizers import SGD
from keras.utils.data_utils import get_file
from keras.callbacks import ModelCheckpoint, TensorBoard
from keras.preprocessing.image import ImageDataGenerator
from sklearn.utils import shuffle
%matplotlib inline
t1=time.time()
train_dir = glob.glob('image/train/*/*.jpg')
for src_name in train_dir:
first_dot = src_name.find('.')
second_dot = src_name.rfind('.')
# print(src_name)
# print(first_dot)
# print(second_dot)
name_num = int(src_name[first_dot+1:second_dot])
# print(name_num)
dst_name = src_name[0:first_dot+1]+ "{0:05d}".format(name_num) + src_name[second_dot:]
# print(dst_name)
os.rename(src_name, dst_name)
t2=time.time()
print(round(t2 - t1, 2), 'seconds to rename !')
From https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition/ download datasets
├── image
│ ├── test
│ │ └── test [12500 images]
│ ├── train [25000 images]
│ │ ├── cat [12500 images]
│ │ └── dog [12500 images]
t1=time.time()
base_model = ResNet50(input_tensor=Input((224,224,3)), weights='imagenet', include_top=False)
model = Model(base_model.input, GlobalAveragePooling2D()(base_model.output))
gen = ImageDataGenerator(rescale=1.0/255)
train_generator = gen.flow_from_directory('image/train', target_size=(224,224), shuffle=False,
batch_size=25)
test_generator = gen.flow_from_directory("image/test", target_size=(224,224), shuffle=False,
batch_size=25, class_mode=None)
# train = model.predict_generator(train_generator, 500)
# with h5py.File("bottleneck_ResNet50_train.h5") as h:
# h.create_dataset("train", data=train)
# h.create_dataset("label", data=train_generator.classes)
# test = model.predict_generator(test_generator, 250)
# with h5py.File("bottleneck_ResNet50_test.h5") as h:
# h.create_dataset("test", data=test)
t2=time.time()
print(round(t2 - t1, 2),' seconds')
print(train_generator.filenames[2])
x, y = train_generator.next()
plt.figure(figsize=(16, 16))
for i, (img, label) in enumerate(zip(x, y)):
plt.subplot(5, 5, i+1)
# if label == 1:
# plt.title('dog')
# else:
# plt.title('cat')
plt.axis('off')
plt.imshow(img, interpolation="nearest")
train_feature = []
label = []
test_feature = []
for filename in ["gap_ResNet50.h5", "gap_InceptionV3.h5","gap_Xception.h5"]:
with h5py.File(filename, 'r') as h:
train_feature.append(np.array(h['train']))
test_feature.append(np.array(h['test']))
label = np.array(h['label'])
train_feature = np.concatenate(train_feature, axis=1)
test_feature = np.concatenate(test_feature, axis=1)
print('train_num: ', len(train_feature))
print('label_num: ', len(label))
print('test_num: ', len(test_feature))
np.random.seed(4)
X_train, y_train = shuffle(train_feature, label)
e = 10
# opt = SGD(lr=0.0001, momentum=0.9)
np.random.seed(4)
input_tensor = Input(X_train.shape[1:])
x = Dropout(0.5)(input_tensor)
x = Dense(1, activation='sigmoid')(x)
my_model = Model(inputs=input_tensor, outputs=x)
my_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) # 'adam'
best_model = ModelCheckpoint('best_model.h5', verbose=0, save_best_only=True)
hist = my_model.fit(X_train, y_train, batch_size=50, epochs=e, validation_split=0.2, callbacks=[best_model, TensorBoard(log_dir='union_tensorlog')])
# plt.axis((0,e,-0.5,1.1))
plt.plot(hist.history['acc'])
plt.plot(hist.history['val_acc'])
plt.title('Training and validation accuracy')
plt.legend(['acc', 'val_acc' ], loc='lower right')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.figure()
# plt.axis((0,e,-0.5,1.1))
plt.plot(hist.history['loss'])
plt.plot(hist.history['val_loss'])
plt.title('Training and validation loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['loss', 'val_loss'], loc='lower right')
plt.show()
y_pred = my_model.predict(train_feature, verbose=1)
cat_abnormal = []
dog_abnormal = []
train_normal = []
label_normal = []
for i in range(0,len(X_train)):
if label[i]==0 and y_pred[i]>0.6:
cat_abnormal.append(train_generator.filenames[i])
elif label[i]==1 and y_pred[i]<0.8:
dog_abnormal.append(train_generator.filenames[i])
else:
train_normal.append(train_feature[i])
label_normal.append(label[i])
cat_abnormal = np.asarray(cat_abnormal)
dog_abnormal = np.asarray(dog_abnormal)
print(len(cat_abnormal), cat_abnormal)
print(len(dog_abnormal), dog_abnormal)
plt.figure(figsize=(10, 16))
for i in range(0,len(cat_abnormal)):
plt.subplot(11, 5, i+1)
cat_name = 'image/train/' + cat_abnormal[i]
img = image.load_img(cat_name,target_size=(224,224))
plt.axis('off')
plt.title(cat_name[cat_name.rfind('\\')+1:])
plt.imshow(img, interpolation="nearest")
plt.figure(figsize=(10, 16))
for i in range(0,len(dog_abnormal)):
plt.subplot(7, 5, i+1)
dog_name = 'image/train/' + dog_abnormal[i]
img = image.load_img(dog_name,target_size=(224,224))
plt.title(dog_name[dog_name.rfind('\\')+1:])
plt.axis('off')
plt.imshow(img, interpolation="nearest")
train_normal = np.asarray(train_normal)
label_normal = np.asarray(label_normal)
print(len(train_normal))
np.random.seed(4)
X_train_2, y_train_2 = shuffle(train_normal, label_normal)
e = 10
np.random.seed(4)
input_tensor = Input(X_train_2.shape[1:])
x = Dropout(0.5)(input_tensor)
x = Dense(1, activation='sigmoid')(x)
my_model_2 = Model(inputs=input_tensor, outputs=x)
my_model_2.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) # 'adam'
best_model_2 = ModelCheckpoint('best_model_2.h5', verbose=0, save_best_only=True)
hist = my_model_2.fit(X_train_2, y_train_2, batch_size=50, epochs=e, validation_split=0.2, callbacks=[best_model_2, TensorBoard(log_dir='union_tensorlog')])
# plt.axis((0,e,-0.5,1.1))
plt.plot(hist.history['acc'])
plt.plot(hist.history['val_acc'])
plt.title('Training and validation accuracy')
plt.legend(['acc', 'val_acc' ], loc='lower right')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.figure()
# plt.axis((0,e,-0.5,1.1))
plt.plot(hist.history['loss'])
plt.plot(hist.history['val_loss'])
plt.title('Training and validation loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['loss', 'val_loss'], loc='lower right')
plt.show()
y_pred_2 = my_model_2.predict(test_feature, verbose=1)
y_pred_2 = y_pred_2.clip(min=0.005, max=0.995)
import pandas as pd
df = pd.read_csv("sample_submission.csv")
gen = ImageDataGenerator()
test_generator = gen.flow_from_directory("image/test/", target_size=(224,224), shuffle=False,
batch_size=50, class_mode=None)
for i, fname in enumerate(test_generator.filenames):
index = int(fname[fname.rfind('test')+5:fname.rfind('.')])
df.set_value(index-1, 'label', y_pred_2[i])
df.to_csv('submission_union_2.csv', index=None)
plt.figure(figsize=(12, 5))
for i in range(15):
x = random.randint(1, len(test_feature))
prediction = y_pred_2[x]
test_name = 'image/test/' + test_generator.filenames[x]
img = image.load_img(test_name,target_size=(224,224))
plt.subplot(3, 5, i+1)
if prediction < 0.5:
plt.title("cat {:.2f}%".format(100 - float(prediction)*100))
else:
plt.title("dog {:.2f}%".format(float(prediction)*100))
plt.axis('off')
plt.imshow(img)